/***********************************************************/
/* Create county-industry (2-digit) year 2000 shares and construct the Bartik Instrument at the county level */
/***********************************************************/
local Data "CountyIndustryEmp_2digit"

noi disp "Obtaining county industrial data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
noi disp "Data not found.  Building data..."
*Employment by firm size across county-years (quarter 2 data to match to SOD)
insheet using "$SourceData\US_county_industry_total.csv", clear
noi disp "Finished loading data"
noi disp "$LocalData"

*Keeping Relevant (not 52, 92, aggregate, state) County-Industries for Analysis
keep if industry!="52" & industry!="92" & geo_level!="S" & industry!="0"
	/*If wanting to restrict to only those counties with material (i.e. non-censored) employment shares for every industry
	gen temp = cond(missing(emp),1,0)
	bysort geography year: egen empty = max(temp)
	keep if empty==0
	drop empty temp
	*/


noi disp "Keeping only county-industy pairs that have observations in both 2000."

*Keeping only data that exist in 2000
keep if year==2000

*Note that in removing industry 52 (and 92) we cannot rely on county aggregates for any employment numbers across industries
*Summing up county-industry employment
*Note: we are setting to "0" the empties by necessity of construction of Bartik. For smaller counties, this is likely a bigger issue for computing industry shares.
*If we restrict to only counties with reported values for each industry-county, we are left with ~300 counties or so. (I think results are robust to this). However, we could not extend this approach to 3-digit NAICS codes.
* Generate county-industry employment (c_i_emp)
noi gen c_i_emp = emp
noi replace c_i_emp = 0 if missing(c_i_emp)
label var c_i_emp "County-Industry Employment"

* Generate total county employment (c_emp)
bysort geography: egen c_emp = sum(c_i_emp)
label var c_i_emp "Total County Employment"
* Drop if there is no employment in the county
noi drop if c_emp <= 0

/*
** IS THIS NEEDED?
* Generate total national employment (n_emp)
egen n_emp = sum(c_i_emp)
label var c_i_emp "Total National Employment"
*/
	
* Generating County-Industy Shares in 2000
gen c_i_share_2000 = c_i_emp/c_emp

* Check to insure every county's shares sum to 1.
bysort geography: egen total = sum(c_i_share_2000)
* Due to rounding, some are negligibly less than 1 (0.9999999)
assert total > 0.99
drop total /* Not needed once check is complete */

keep geography industry c_i_share_2000 c_emp

*Constructing county-industry shares excluding mining, manufacturing, and both
gen c_i_share_no21_2000 = cond(industry!="21",c_i_share_2000,0)
gen c_i_share_no31_2000 = cond(industry!="31",c_i_share_2000,0)
gen c_i_share_no2131_2000 = cond(industry!="31" & industry!="21",c_i_share_2000,0)

* Rescale shares so that county's shares sum to 1.
bysort geography: egen total21 = sum(c_i_share_no21_2000)
bysort geography: egen total31 = sum(c_i_share_no31_2000)
bysort geography: egen total2131 = sum(c_i_share_no2131_2000)

replace c_i_share_no21_2000 = c_i_share_no21_2000/total21 if industry!="21"
replace c_i_share_no31_2000 = c_i_share_no31_2000/total31 if industry!="31-33"
replace c_i_share_no2131_2000 = c_i_share_no2131_2000/total2131 if industry!="31-33"


		
*Merging In County Size Shares
* Join by geography
sort geography
* Combines datasets horizontally but forming all pairwise combinations within county (geography)
joinby geography using "$LocalData\CountyYearFirmSizes.dta"

*Merging In National Industry-Firmsize Growth Rates
sort industry year
merge m:1 industry year using "$LocalData\NationalIndustryFirmGrowth2digit.dta"
drop _merge
		
	
****************************************************************************************
*Construction of Bartik Instrument******************************************************
****************************************************************************************
	
* Bartik Instrument: national industry growth for small and large firms, weighted by county-industry shares
noi disp "Using asgen to generate industry growth, by size, weighted by county-industry shares."

foreach span in 1 3 5 15 {
	bysort geography year: asgen bartikGL`span' = g_nat_ind_size`span'_5, weight(c_i_share_2000)
	bysort geography year: asgen bartikGS`span' = g_nat_ind_size`span'_1, weight(c_i_share_2000)
	bysort geography year: asgen bartikL`span' = d_nat_ind_share`span'_5, weight(c_i_share_2000)
	bysort geography year: asgen bartikS`span' = d_nat_ind_share`span'_1, weight(c_i_share_2000)
	bysort geography year: asgen bartikGS21_`span' = g_nat_ind_size`span'_1, weight(c_i_share_no21_2000)
	bysort geography year: asgen bartikGS31_`span' = g_nat_ind_size`span'_1, weight(c_i_share_no31_2000)
	bysort geography year: asgen bartikGS2131_`span' = g_nat_ind_size`span'_1, weight(c_i_share_no2131_2000)
	}	

noi disp "Obtaining industry match data..."
noi disp "Verifying that this data does not already exist..."
* This returns an error if the file does not exist.
capture confirm file IndustryMatch.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building IndustryMatch.dta..."
	preserve 

	*Defining Industry Names	
 	egen ind = group(industry)
 	keep ind industry
 	bysort ind: gen obs= _n
 	keep if obs==1
	drop obs
	
 	gen IndLabel = "Mining, Quarrying, Gas Extraction" if industry=="21"
 	replace IndLabel = "Agriculture, Forestry, Fishing, Hunting" if industry=="11"
 	replace IndLabel = "Utilities" if industry=="22"
 	replace IndLabel = "Construction" if industry=="23"
 	replace IndLabel = "Manufacturing" if industry=="31-33"
 	replace IndLabel = "Wholesale Trade" if industry=="42"
 	replace IndLabel = "Retail Trade" if industry=="44-45"
 	replace IndLabel = "Transportation and Warehousing" if industry=="48-49"
 	replace IndLabel = "Information" if industry=="51"
 	replace IndLabel = "Real Estate, Rental, Leasing" if industry=="53"
 	replace IndLabel = "Prof., Scientific, Tech Services" if industry=="54"
 	replace IndLabel = "Mgmt of Companies and Enterprises" if industry=="55"
 	replace IndLabel = "Admin and Support and Waste Mgmt" if industry=="56"
 	replace IndLabel = "Education" if industry=="61"
 	replace IndLabel = "Health Care, Social Assistance" if industry=="62"
 	replace IndLabel = "Arts, Entertainment, Recreation" if industry=="71"
 	replace IndLabel = "Accommodation and Food Serv" if industry=="72"
 	replace IndLabel = "Other (non Public Admin)" if industry=="81"
	
	compress
 	save "$LocalData\IndustryMatch.dta", replace
	save "$LocalData\Archive\IndustryMatch`CurrentDate'.dta", replace
	restore
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "


noi disp "Obtaining initial shares data..."
noi disp "Verifying that this data does not already exist..."
* This returns an error if the file does not exist.
capture confirm file InitialShares2000_2digit.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building InitialShares2000_2digit.dta..."

	*Setting up Wide data to merge in Initial Shares and National Industry Firm-Size Growth Rates as columns into county-year data structure
	preserve 
	keep year geography c_i_share_2000 industry g_nat_ind_size* g_nat_ind_size* d_nat_ind_share1_1 d_nat_ind_share1_5
	drop if industry==" " | industry=="52"
	replace c_i_share_2000 = 0 if missing(c_i_share_2000)
	rename g_nat_ind_size1_1 g_nat_ind_size1_1_
	rename g_nat_ind_size3_1 g_nat_ind_size3_1_
	rename g_nat_ind_size5_1 g_nat_ind_size5_1_
	rename g_nat_ind_size15_1 g_nat_ind_size15_1_

	rename g_nat_ind_size1_5 g_nat_ind_size1_5_
	rename g_nat_ind_size3_5 g_nat_ind_size3_5_
	rename g_nat_ind_size5_5 g_nat_ind_size5_5_
	rename g_nat_ind_size15_5 g_nat_ind_size15_5_
	
	rename d_nat_ind_share1_1 d_nat_ind_share1_
	rename d_nat_ind_share1_5 d_nat_ind_share5_
	*Renaming NAICS 2-digit codes to allow for destringing (necessary for reshape function)
	replace industry="31" if industry=="31-33"
	replace industry="44" if industry=="44-45"
	replace industry="48" if industry=="48-49"
	destring industry, replace
	keep if !missing(industry)
	reshape wide c_i_share_2000 g_nat_ind_size* d_nat_ind_share*, i(geography year) j(industry)
	
	local ind_stub c_i_share_2000
	foreach cshare of varlist `ind_stub'* {
		bysort geography: egen temp = max(`cshare')
		replace `cshare' = temp if missing(`cshare')
		drop temp
		replace `cshare' = 0 if missing(`cshare')
	}
	
	local growth g_nat_ind_size* 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}

	local growth d_nat_ind_share1_1 d_nat_ind_share1_5 
	foreach g of varlist `growth'* {
		bysort year: egen temp = max(`g')
		replace `g' = temp if missing(`g')
		drop temp
		replace `g' = 0 if missing(`g')
	}

	
	compress
	save "$LocalData\InitialShares2000_2digit.dta", replace
	save "$LocalData\Archive\InitialShares2000_2digit_`CurrentDate'.dta", replace

	restore
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "

		
/******************************************************************************/
/* Generating County Industrial Dataset                                       */
/******************************************************************************/


	bysort geography year: gen obs2 = _n 
	keep if obs2==1
	keep geography year c_emp bartikGS* bartikGL* bartikS* bartikL* gSmallFirm* gLargeFirm* dSmallShare* dLargeShare* emp_size1 emp_size5
	
	sort geography year
	compress

	merge 1:1 geography year using "$LocalData\InitialShares2000_2digit.dta"
	drop _merge

	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace

}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "
